from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests
import time
import re
def getHtml(url):
    try:
        header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362",
            "cookie": "mfw_uuid=5ed3bbc5-867c-7747-be1b-04fb2a8751e9; oad_n=a%3A3%3A%7Bs%3A3%3A%22oid%22%3Bi%3A1029%3Bs%3A2%3A%22dm%22%3Bs%3A15%3A%22www.mafengwo.cn%22%3Bs%3A2%3A%22ft%22%3Bs%3A19%3A%222020-06-01+23%3A43%3A59%22%"
        }
        r = requests.get(url, headers=header)
        r.raise_for_status()
        r.encoding="utf-8"
        return r.text
    except:
        return "读取失败"
def getdata(u):#获取三个市的餐饮销量信息
    r = getHtml(u)
    soup = BeautifulSoup(r, "lxml")
    data=soup.find_all('li',class_='item clearfix')
    datafor=[]
    for i in data:
        dicts={}
        dicts['门店名称']=(i.find('h3').text).strip()
        dicts['评分']=(i.find('div',class_='grade').find('em')).text
        dicts['评论数']=i.find('div',class_='grade').find('p',class_='rev-num').text.split('条')[0]
        datafor.append(dicts)
    return datafor
def main():
    data = []
    for i in range(1, 21):
        try:
            #data.extend(getdata("https://www.mafengwo.cn/cy/10207/0-0-0-0-0-1-"+str(i)+".html"))
            data.extend(getdata("https://www.mafengwo.cn/cy/10140/0-0-0-0-0-1-" + str(i) + ".html"))
            time.sleep(0.3)
        except:
            continue
    p = pd.DataFrame(data)
   # p.to_csv(r"C:/Users/gcc/Desktop/suzhouf.csv", encoding="utf-8", index=False)
    p.to_csv(r"C:/Users/gcc/Desktop/wuxiff.csv", encoding="utf-8", index=False)